
* ***************************************************** * 
*  	DATA PREPARATION                                    * 
*  	                                                    * 
* 	Input Files:  - PresidentialElection.dta            *
*                 - mig1910.dta                         *
*                 - class2015.dta                       *
*                 - ICPSR.dta                           *
*                 - CCES16_Common_OUTPUT_Feb2018_VV.dta *
*                                                       *
*   Data Outputs: - 1984_2016_wide.dta                  * 
*                 - ICPSR.dta                           *
*                 - DataAll.dta                         *
*                 - GerDominanceIndic.dta               *
*                 - IndividualData.dta                  *
* ****************************************************  * 


* import the presidential election data 
use "/Users/Klara/Desktop/Project DH TG/REPLICATION/replicationES/PresidentialElection.dta", clear


* solve the issue in variable jewish
replace jewish = "." in 14471
replace jewish = "." in 17624
replace jewish = "." in 20777
gen na = real(jewish)
drop jewish
rename na jewish


* calculate the voteshare 
drop dempercent
gen dempercent = .
replace dempercent = (100 / totalvotes) * demvotes

drop reppercent
gen reppercent = .
replace reppercent = (100 / totalvotes) * repvotes

drop otherpercent
gen  otherpercent = .
replace  otherpercent = (100 / totalvotes) * othervotes

* re-order the variables
order yearvote fips state stateabb county winner totalvotes demcandidate demvotes dempercent repcandidate repvotes reppercent highestother othervotes otherpercent



*** ANCESTRY VARIABLES ***

* Calculate the ancestry shares 

gen German_ancestry_share = .
label var German_ancestry_share "German ancestry share"
replace German_ancestry_share = (100 /totalpopulation * germantotal)

gen German_share_single = .
label var German_share_single "German single ancestry share"
replace German_share_single = (100 /totalpopulation * germansingle)

gen American_ancestry_share = .
label var American_ancestry_share "American ancestry share"
replace American_ancestry_share = (100 /totalpopulation * americantotal)

gen American_share_single = .
label var American_share_single "American single ancestry share"
replace American_share_single = (100 /totalpopulation * americansingle)


gen African_ancestry_share = .
label var African_ancestry_share "African ancestry share"
replace African_ancestry_share = (100 /totalpopulation * africantotal)
* single does not exist

gen English_ancestry_share = .
label var English_ancestry_share "English ancestry share"
replace English_ancestry_share = (100 /totalpopulation * englishtotal)

gen English_share_single = .
label var English_share_single "English single ancestry share"
replace English_share_single = (100 /totalpopulation * englishsingle)


gen French_ancestry_share = .
label var French_ancestry_share "French ancestry share"
replace French_ancestry_share = (100 /totalpopulation * frenchtotal)

gen French_share_single = .
label var French_share_single "French single ancestry share"
replace French_share_single = (100 /totalpopulation * frenchsingle)


gen Irish_ancestry_share = .
label var Irish_ancestry_share "Irish ancestry share"
replace Irish_ancestry_share = (100 /totalpopulation * irishtotal)

gen Irish_share_single = .
label var Irish_share_single "Irish single ancestry share"
replace Irish_share_single = (100 /totalpopulation * irishsingle)


gen Italian_ancestry_share = .
label var Italian_ancestry_share "Italian ancestry share"
replace Italian_ancestry_share = (100 /totalpopulation * italiantotal)

gen Italian_share_single = .
label var Italian_share_single "Italian single ancestry share"
replace Italian_share_single = (100 /totalpopulation * italiansingle)


gen Polish_ancestry_share = .
label var Polish_ancestry_share "Polish ancestry share"
replace Polish_ancestry_share = (100 /totalpopulation * polishtotal)

gen Polish_share_single = .
label var Polish_share_single "Polish single ancestry share"
replace Polish_share_single = (100 /totalpopulation * polishsingle)


gen Scandinavian_ancestry_share = .
label var Scandinavian_ancestry_share "Scandinavian ancestry share"
replace Scandinavian_ancestry_share = (100 /totalpopulation * scandiviantotal)

gen Scan_share_single = .
label var Scan_share_single "Scandinavian single ancestry share"
replace Scan_share_single = (100 /totalpopulation * scandiviansingle)


gen Hispanic_ancestry_share = .
label var Hispanic_ancestry_share "Hispanic ancestry share"
replace Hispanic_ancestry_share = (100 /totalpopulation * hispanictotal)
* single does not exist


* Hispanic single ancestries
gen Mexican_ancestry_share = .
label var Mexican_ancestry_share "Mexican ancestry share"
replace Mexican_ancestry_share = (100 /totalpopulation * mexicantotal)

gen Puertorican_ancestry_share = .
label var Puertorican_ancestry_share "Puertorican ancestry share"
replace Puertorican_ancestry_share = (100 /totalpopulation * puertoricantotal)

gen Cuban_ancestry_share = .
label var Cuban_ancestry_share "Cuban ancestry share"
replace Cuban_ancestry_share = (100 /totalpopulation * cubantotal)



******** AMERICAN SOUTH  ********

* dummy variable for counties that are in the American South: 
	* Alabama, Arkansas, Florida, Georgia, Kentucky, Louisiana, Mississippi,
	* North Carolina, Oklahoma, South Carolina, Tennessee, Texas, Virginia,
	* and West Virginia. 
gen dummy_american_south = 0
label var dummy_american_south "Counties in the American South"
replace dummy_american_south = 1 if stateabb == "al" | stateabb == "ar" | stateabb == "fl" | stateabb == "ga" | stateabb == "ky" | stateabb == "la" | stateabb == "ms" | stateabb == "nc" | stateabb == "ok" | stateabb == "sc" | stateabb == "tn" | stateabb == "tx" | stateabb == "va" | stateabb == "wn"  
label define south 1 "American South" 0 "American North" 
label values dummy_american_south south


******** Differences between Rep/ Dem  ********

* calculate the difference between Rep/ Dem (over all years)
gen DiffRepDem = .
replace DiffRepDem = (reppercent - dempercent)
	

	
*** DOMINCANCE OF ANCESTRIES ***	
	
	* for SINGLE ancestries
	
* sum the ancestry shares
gen AncestryShareSum = .
replace AncestryShareSum = German_share_single + American_share_single + English_share_single + French_share_single + Irish_share_single + Italian_share_single + Polish_share_single + Scan_share_single

* bring them all together to 100% & directly normalize them to 0-1 and not 0-100%
gen GermanSum100 = . 
replace GermanSum100 = ((100 / AncestryShareSum) * German_share_single) / 100

gen AmericanSum100 = .
replace AmericanSum100 = ((100 / AncestryShareSum) * American_share_single) / 100 
	
gen EnglishSum100 = .
replace EnglishSum100 = ((100 / AncestryShareSum) * English_share_single) / 100
	
gen FrenchSum100 = .
replace FrenchSum100 = ((100 / AncestryShareSum) * French_share_single) / 100 
	
gen IrishSum100 = .
replace IrishSum100 = ((100 / AncestryShareSum) * Irish_share_single) / 100 
	
gen ItalianSum100 = .
replace ItalianSum100 = ((100 / AncestryShareSum) * Italian_share_single) / 100
	
gen PolishSum100 = .
replace PolishSum100 = ((100 / AncestryShareSum) * Polish_share_single) / 100 
	
gen ScanSum100 = .
replace ScanSum100 = ((100 / AncestryShareSum) * Scan_share_single) / 100 

* check whether it worked
gen test1 = . // worked!
replace test1 = GermanSum100 + AmericanSum100 + EnglishSum100 + FrenchSum100 + IrishSum100 + ItalianSum100 + PolishSum100 + ScanSum100
drop test1



******** genderate a dummy for the highest ancestry ********

gen GermanFirstHighest = .
replace GermanFirstHighest = 0 if German_share_single < American_share_single | German_share_single < English_share_single | German_share_single < French_share_single | German_share_single < Irish_share_single | German_share_single < Italian_share_single | German_share_single < Polish_share_single | German_share_single < Scan_share_single 
replace GermanFirstHighest = 1 if German_share_single > American_share_single & German_share_single > English_share_single & German_share_single > French_share_single & German_share_single > Irish_share_single & German_share_single > Italian_share_single & German_share_single > Polish_share_single & German_share_single > Scan_share_single 

gen AmericanFirstHighest = .
replace AmericanFirstHighest = 0 if American_share_single < German_share_single | American_share_single < English_share_single | American_share_single < French_share_single | American_share_single < Irish_share_single | American_share_single < Italian_share_single | American_share_single < Polish_share_single | American_share_single < Scan_share_single 
replace AmericanFirstHighest = 1 if American_share_single > German_share_single & American_share_single > English_share_single & American_share_single > French_share_single & American_share_single > Irish_share_single & American_share_single > Italian_share_single & American_share_single > Polish_share_single & American_share_single > Scan_share_single 

gen EnglishFirstHighest = .
replace EnglishFirstHighest = 0 if English_share_single < German_share_single | English_share_single < American_share_single | English_share_single < French_share_single | English_share_single < Irish_share_single | English_share_single < Italian_share_single | English_share_single < Polish_share_single | English_share_single < Scan_share_single 
replace EnglishFirstHighest = 1 if English_share_single > German_share_single & English_share_single > American_share_single & English_share_single > French_share_single & English_share_single > Irish_share_single & English_share_single > Italian_share_single & English_share_single > Polish_share_single & English_share_single > Scan_share_single 

gen FrenchFirstHighest = .
replace FrenchFirstHighest = 0 if French_share_single < German_share_single | French_share_single < English_share_single | French_share_single < American_share_single | French_share_single < Irish_share_single | French_share_single < Italian_share_single | French_share_single < Polish_share_single | French_share_single < Scan_share_single 
replace FrenchFirstHighest = 1 if French_share_single > German_share_single & French_share_single > English_share_single & French_share_single > American_share_single & French_share_single > Irish_share_single & French_share_single > Italian_share_single & French_share_single > Polish_share_single & French_share_single > Scan_share_single 

gen IrishFirstHighest = .
replace IrishFirstHighest = 0 if Irish_share_single < American_share_single | Irish_share_single < English_share_single | Irish_share_single < French_share_single | Irish_share_single < German_share_single | Irish_share_single < Italian_share_single | Irish_share_single < Polish_share_single | Irish_share_single < Scan_share_single 
replace IrishFirstHighest = 1 if Irish_share_single > American_share_single & Irish_share_single > English_share_single & Irish_share_single > French_share_single & Irish_share_single > German_share_single & Irish_share_single > Italian_share_single & Irish_share_single > Polish_share_single & Irish_share_single > Scan_share_single 

gen ItalianFirstHighest = .
replace ItalianFirstHighest = 0 if Italian_share_single < American_share_single | Italian_share_single < English_share_single | Italian_share_single < French_share_single | Italian_share_single < Irish_share_single | Italian_share_single < German_share_single | Italian_share_single < Polish_share_single | Italian_share_single < Scan_share_single 
replace ItalianFirstHighest = 1 if Italian_share_single > American_share_single & Italian_share_single > English_share_single & Italian_share_single > French_share_single & Italian_share_single > Irish_share_single & Italian_share_single > German_share_single & Italian_share_single > Polish_share_single & Italian_share_single > Scan_share_single 

gen PolishFirstHighest = .
replace PolishFirstHighest = 0 if Polish_share_single < American_share_single | Polish_share_single < English_share_single | Polish_share_single < French_share_single | Polish_share_single < Irish_share_single | Polish_share_single < Italian_share_single | Polish_share_single < German_share_single | Polish_share_single < Scan_share_single 
replace PolishFirstHighest = 1 if Polish_share_single > American_share_single & Polish_share_single > English_share_single & Polish_share_single > French_share_single & Polish_share_single > Irish_share_single & Polish_share_single > Italian_share_single & Polish_share_single > German_share_single & Polish_share_single > Scan_share_single 

gen ScanFirstHighest = .
replace ScanFirstHighest = 0 if Scan_share_single < American_share_single | Scan_share_single < English_share_single | Scan_share_single < French_share_single | Scan_share_single < Irish_share_single | Scan_share_single < Italian_share_single | Scan_share_single < Polish_share_single | Scan_share_single < German_share_single 
replace ScanFirstHighest = 1 if Scan_share_single > American_share_single & Scan_share_single > English_share_single & Scan_share_single > French_share_single & Scan_share_single > Irish_share_single & Scan_share_single > Italian_share_single & Scan_share_single > Polish_share_single & Scan_share_single > German_share_single 

******** generate a dummy for the second highest ancestry ********

gen GermanSecondHighest = 0 // as we only care about the 1s, the rest can be coded as 0 
replace GermanSecondHighest = 1 if German_share_single < American_share_single & German_share_single > English_share_single & German_share_single > French_share_single & German_share_single > Irish_share_single & German_share_single > Italian_share_single & German_share_single > Polish_share_single & German_share_single > Scan_share_single 
replace GermanSecondHighest = 1 if German_share_single > American_share_single & German_share_single < English_share_single & German_share_single > French_share_single & German_share_single > Irish_share_single & German_share_single > Italian_share_single & German_share_single > Polish_share_single & German_share_single > Scan_share_single 
replace GermanSecondHighest = 1 if German_share_single > American_share_single & German_share_single > English_share_single & German_share_single < French_share_single & German_share_single > Irish_share_single & German_share_single > Italian_share_single & German_share_single > Polish_share_single & German_share_single > Scan_share_single 
replace GermanSecondHighest = 1 if German_share_single > American_share_single & German_share_single > English_share_single & German_share_single > French_share_single & German_share_single < Irish_share_single & German_share_single > Italian_share_single & German_share_single > Polish_share_single & German_share_single > Scan_share_single 
replace GermanSecondHighest = 1 if German_share_single > American_share_single & German_share_single > English_share_single & German_share_single > French_share_single & German_share_single > Irish_share_single & German_share_single < Italian_share_single & German_share_single > Polish_share_single & German_share_single > Scan_share_single 
replace GermanSecondHighest = 1 if German_share_single > American_share_single & German_share_single > English_share_single & German_share_single > French_share_single & German_share_single > Irish_share_single & German_share_single > Italian_share_single & German_share_single < Polish_share_single & German_share_single > Scan_share_single 
replace GermanSecondHighest = 1 if German_share_single > American_share_single & German_share_single > English_share_single & German_share_single > French_share_single & German_share_single > Irish_share_single & German_share_single > Italian_share_single & German_share_single > Polish_share_single & German_share_single < Scan_share_single 


gen AmericanSecondHighest = 0
replace AmericanSecondHighest = 1 if American_share_single < German_share_single & American_share_single > English_share_single & American_share_single > French_share_single & American_share_single > Irish_share_single & American_share_single > Italian_share_single & American_share_single > Polish_share_single & American_share_single > Scan_share_single 
replace AmericanSecondHighest = 1 if American_share_single > German_share_single & American_share_single < English_share_single & American_share_single > French_share_single & American_share_single > Irish_share_single & American_share_single > Italian_share_single & American_share_single > Polish_share_single & American_share_single > Scan_share_single 
replace AmericanSecondHighest = 1 if American_share_single > German_share_single & American_share_single > English_share_single & American_share_single < French_share_single & American_share_single > Irish_share_single & American_share_single > Italian_share_single & American_share_single > Polish_share_single & American_share_single > Scan_share_single 
replace AmericanSecondHighest = 1 if American_share_single > German_share_single & American_share_single > English_share_single & American_share_single > French_share_single & American_share_single < Irish_share_single & American_share_single > Italian_share_single & American_share_single > Polish_share_single & American_share_single > Scan_share_single 
replace AmericanSecondHighest = 1 if American_share_single > German_share_single & American_share_single > English_share_single & American_share_single > French_share_single & American_share_single > Irish_share_single & American_share_single < Italian_share_single & American_share_single > Polish_share_single & American_share_single > Scan_share_single 
replace AmericanSecondHighest = 1 if American_share_single > German_share_single & American_share_single > English_share_single & American_share_single > French_share_single & American_share_single > Irish_share_single & American_share_single > Italian_share_single & American_share_single < Polish_share_single & American_share_single > Scan_share_single 
replace AmericanSecondHighest = 1 if American_share_single > German_share_single & American_share_single > English_share_single & American_share_single > French_share_single & American_share_single > Irish_share_single & American_share_single > Italian_share_single & American_share_single > Polish_share_single & American_share_single < Scan_share_single 

gen EnglishSecondHighest = 0
replace EnglishSecondHighest = 1 if English_share_single < German_share_single & English_share_single > American_share_single & English_share_single > French_share_single & English_share_single > Irish_share_single & English_share_single > Italian_share_single & English_share_single > Polish_share_single & English_share_single > Scan_share_single 
replace EnglishSecondHighest = 1 if English_share_single > German_share_single & English_share_single < American_share_single & English_share_single > French_share_single & English_share_single > Irish_share_single & English_share_single > Italian_share_single & English_share_single > Polish_share_single & English_share_single > Scan_share_single 
replace EnglishSecondHighest = 1 if English_share_single > German_share_single & English_share_single > American_share_single & English_share_single < French_share_single & English_share_single > Irish_share_single & English_share_single > Italian_share_single & English_share_single > Polish_share_single & English_share_single > Scan_share_single 
replace EnglishSecondHighest = 1 if English_share_single > German_share_single & English_share_single > American_share_single & English_share_single > French_share_single & English_share_single < Irish_share_single & English_share_single > Italian_share_single & English_share_single > Polish_share_single & English_share_single > Scan_share_single 
replace EnglishSecondHighest = 1 if English_share_single > German_share_single & English_share_single > American_share_single & English_share_single > French_share_single & English_share_single > Irish_share_single & English_share_single < Italian_share_single & English_share_single > Polish_share_single & English_share_single > Scan_share_single 
replace EnglishSecondHighest = 1 if English_share_single > German_share_single & English_share_single > American_share_single & English_share_single > French_share_single & English_share_single > Irish_share_single & English_share_single > Italian_share_single & English_share_single < Polish_share_single & English_share_single > Scan_share_single 
replace EnglishSecondHighest = 1 if English_share_single > German_share_single & English_share_single > American_share_single & English_share_single > French_share_single & English_share_single > Irish_share_single & English_share_single > Italian_share_single & English_share_single > Polish_share_single & English_share_single < Scan_share_single 

gen FrenchSecondHighest = 0
replace FrenchSecondHighest = 1 if French_share_single < German_share_single & French_share_single > English_share_single & French_share_single > American_share_single & French_share_single > Irish_share_single & French_share_single > Italian_share_single & French_share_single > Polish_share_single & French_share_single > Scan_share_single 
replace FrenchSecondHighest = 1 if French_share_single > German_share_single & French_share_single < English_share_single & French_share_single > American_share_single & French_share_single > Irish_share_single & French_share_single > Italian_share_single & French_share_single > Polish_share_single & French_share_single > Scan_share_single 
replace FrenchSecondHighest = 1 if French_share_single > German_share_single & French_share_single > English_share_single & French_share_single < American_share_single & French_share_single > Irish_share_single & French_share_single > Italian_share_single & French_share_single > Polish_share_single & French_share_single > Scan_share_single 
replace FrenchSecondHighest = 1 if French_share_single > German_share_single & French_share_single > English_share_single & French_share_single > American_share_single & French_share_single < Irish_share_single & French_share_single > Italian_share_single & French_share_single > Polish_share_single & French_share_single > Scan_share_single 
replace FrenchSecondHighest = 1 if French_share_single > German_share_single & French_share_single > English_share_single & French_share_single > American_share_single & French_share_single > Irish_share_single & French_share_single < Italian_share_single & French_share_single > Polish_share_single & French_share_single > Scan_share_single 
replace FrenchSecondHighest = 1 if French_share_single > German_share_single & French_share_single > English_share_single & French_share_single > American_share_single & French_share_single > Irish_share_single & French_share_single > Italian_share_single & French_share_single < Polish_share_single & French_share_single > Scan_share_single 
replace FrenchSecondHighest = 1 if French_share_single > German_share_single & French_share_single > English_share_single & French_share_single > American_share_single & French_share_single > Irish_share_single & French_share_single > Italian_share_single & French_share_single > Polish_share_single & French_share_single < Scan_share_single 

gen IrishSecondHighest = 0
replace IrishSecondHighest = 1 if Irish_share_single < American_share_single & Irish_share_single > English_share_single & Irish_share_single > French_share_single & Irish_share_single > German_share_single & Irish_share_single > Italian_share_single & Irish_share_single > Polish_share_single & Irish_share_single > Scan_share_single 
replace IrishSecondHighest = 1 if Irish_share_single > American_share_single & Irish_share_single < English_share_single & Irish_share_single > French_share_single & Irish_share_single > German_share_single & Irish_share_single > Italian_share_single & Irish_share_single > Polish_share_single & Irish_share_single > Scan_share_single 
replace IrishSecondHighest = 1 if Irish_share_single > American_share_single & Irish_share_single > English_share_single & Irish_share_single < French_share_single & Irish_share_single > German_share_single & Irish_share_single > Italian_share_single & Irish_share_single > Polish_share_single & Irish_share_single > Scan_share_single 
replace IrishSecondHighest = 1 if Irish_share_single > American_share_single & Irish_share_single > English_share_single & Irish_share_single > French_share_single & Irish_share_single < German_share_single & Irish_share_single > Italian_share_single & Irish_share_single > Polish_share_single & Irish_share_single > Scan_share_single 
replace IrishSecondHighest = 1 if Irish_share_single > American_share_single & Irish_share_single > English_share_single & Irish_share_single > French_share_single & Irish_share_single > German_share_single & Irish_share_single < Italian_share_single & Irish_share_single > Polish_share_single & Irish_share_single > Scan_share_single 
replace IrishSecondHighest = 1 if Irish_share_single > American_share_single & Irish_share_single > English_share_single & Irish_share_single > French_share_single & Irish_share_single > German_share_single & Irish_share_single > Italian_share_single & Irish_share_single < Polish_share_single & Irish_share_single > Scan_share_single 
replace IrishSecondHighest = 1 if Irish_share_single > American_share_single & Irish_share_single > English_share_single & Irish_share_single > French_share_single & Irish_share_single > German_share_single & Irish_share_single > Italian_share_single & Irish_share_single > Polish_share_single & Irish_share_single < Scan_share_single 


gen ItalianSecondHighest = 0
replace ItalianSecondHighest = 1 if Italian_share_single < American_share_single & Italian_share_single > English_share_single & Italian_share_single > French_share_single & Italian_share_single > Irish_share_single & Italian_share_single > German_share_single & Italian_share_single > Polish_share_single & Italian_share_single > Scan_share_single 
replace ItalianSecondHighest = 1 if Italian_share_single > American_share_single & Italian_share_single < English_share_single & Italian_share_single > French_share_single & Italian_share_single > Irish_share_single & Italian_share_single > German_share_single & Italian_share_single > Polish_share_single & Italian_share_single > Scan_share_single 
replace ItalianSecondHighest = 1 if Italian_share_single > American_share_single & Italian_share_single > English_share_single & Italian_share_single < French_share_single & Italian_share_single > Irish_share_single & Italian_share_single > German_share_single & Italian_share_single > Polish_share_single & Italian_share_single > Scan_share_single 
replace ItalianSecondHighest = 1 if Italian_share_single > American_share_single & Italian_share_single > English_share_single & Italian_share_single > French_share_single & Italian_share_single < Irish_share_single & Italian_share_single > German_share_single & Italian_share_single > Polish_share_single & Italian_share_single > Scan_share_single 
replace ItalianSecondHighest = 1 if Italian_share_single > American_share_single & Italian_share_single > English_share_single & Italian_share_single > French_share_single & Italian_share_single > Irish_share_single & Italian_share_single < German_share_single & Italian_share_single > Polish_share_single & Italian_share_single > Scan_share_single 
replace ItalianSecondHighest = 1 if Italian_share_single > American_share_single & Italian_share_single > English_share_single & Italian_share_single > French_share_single & Italian_share_single > Irish_share_single & Italian_share_single > German_share_single & Italian_share_single < Polish_share_single & Italian_share_single > Scan_share_single 
replace ItalianSecondHighest = 1 if Italian_share_single > American_share_single & Italian_share_single > English_share_single & Italian_share_single > French_share_single & Italian_share_single > Irish_share_single & Italian_share_single > German_share_single & Italian_share_single > Polish_share_single & Italian_share_single < Scan_share_single 


gen PolishSecondHighest = 0
replace PolishSecondHighest = 1 if Polish_share_single < American_share_single & Polish_share_single > English_share_single & Polish_share_single > French_share_single & Polish_share_single > Irish_share_single & Polish_share_single > Italian_share_single & Polish_share_single > German_share_single & Polish_share_single > Scan_share_single 
replace PolishSecondHighest = 1 if Polish_share_single > American_share_single & Polish_share_single < English_share_single & Polish_share_single > French_share_single & Polish_share_single > Irish_share_single & Polish_share_single > Italian_share_single & Polish_share_single > German_share_single & Polish_share_single > Scan_share_single 
replace PolishSecondHighest = 1 if Polish_share_single > American_share_single & Polish_share_single > English_share_single & Polish_share_single < French_share_single & Polish_share_single > Irish_share_single & Polish_share_single > Italian_share_single & Polish_share_single > German_share_single & Polish_share_single > Scan_share_single 
replace PolishSecondHighest = 1 if Polish_share_single > American_share_single & Polish_share_single > English_share_single & Polish_share_single > French_share_single & Polish_share_single < Irish_share_single & Polish_share_single > Italian_share_single & Polish_share_single > German_share_single & Polish_share_single > Scan_share_single 
replace PolishSecondHighest = 1 if Polish_share_single > American_share_single & Polish_share_single > English_share_single & Polish_share_single > French_share_single & Polish_share_single > Irish_share_single & Polish_share_single < Italian_share_single & Polish_share_single > German_share_single & Polish_share_single > Scan_share_single 
replace PolishSecondHighest = 1 if Polish_share_single > American_share_single & Polish_share_single > English_share_single & Polish_share_single > French_share_single & Polish_share_single > Irish_share_single & Polish_share_single > Italian_share_single & Polish_share_single < German_share_single & Polish_share_single > Scan_share_single 
replace PolishSecondHighest = 1 if Polish_share_single > American_share_single & Polish_share_single > English_share_single & Polish_share_single > French_share_single & Polish_share_single > Irish_share_single & Polish_share_single > Italian_share_single & Polish_share_single > German_share_single & Polish_share_single < Scan_share_single 


gen ScanSecondHighest = 0
replace ScanSecondHighest = 1 if Scan_share_single < American_share_single & Scan_share_single > English_share_single & Scan_share_single > French_share_single & Scan_share_single > Irish_share_single & Scan_share_single > Italian_share_single & Scan_share_single > Polish_share_single & Scan_share_single > German_share_single 
replace ScanSecondHighest = 1 if Scan_share_single > American_share_single & Scan_share_single < English_share_single & Scan_share_single > French_share_single & Scan_share_single > Irish_share_single & Scan_share_single > Italian_share_single & Scan_share_single > Polish_share_single & Scan_share_single > German_share_single 
replace ScanSecondHighest = 1 if Scan_share_single > American_share_single & Scan_share_single > English_share_single & Scan_share_single < French_share_single & Scan_share_single > Irish_share_single & Scan_share_single > Italian_share_single & Scan_share_single > Polish_share_single & Scan_share_single > German_share_single 
replace ScanSecondHighest = 1 if Scan_share_single > American_share_single & Scan_share_single > English_share_single & Scan_share_single > French_share_single & Scan_share_single < Irish_share_single & Scan_share_single > Italian_share_single & Scan_share_single > Polish_share_single & Scan_share_single > German_share_single 
replace ScanSecondHighest = 1 if Scan_share_single > American_share_single & Scan_share_single > English_share_single & Scan_share_single > French_share_single & Scan_share_single > Irish_share_single & Scan_share_single < Italian_share_single & Scan_share_single > Polish_share_single & Scan_share_single > German_share_single 
replace ScanSecondHighest = 1 if Scan_share_single > American_share_single & Scan_share_single > English_share_single & Scan_share_single > French_share_single & Scan_share_single > Irish_share_single & Scan_share_single > Italian_share_single & Scan_share_single < Polish_share_single & Scan_share_single > German_share_single 
replace ScanSecondHighest = 1 if Scan_share_single > American_share_single & Scan_share_single > English_share_single & Scan_share_single > French_share_single & Scan_share_single > Irish_share_single & Scan_share_single > Italian_share_single & Scan_share_single > Polish_share_single & Scan_share_single < German_share_single 




******** NEW DOMINANCE MEAUSRE ********

* calculate d = s * (s/r) for the first (and second) highest ancestry (combinations)
	
gen dominance = .

replace dominance = GermanSum100 * (GermanSum100 / AmericanSum100) 		if GermanFirstHighest == 1 & AmericanSecondHighest == 1
replace dominance = GermanSum100 * (GermanSum100 / EnglishSum100) 		if GermanFirstHighest == 1 & EnglishSecondHighest == 1
replace dominance = GermanSum100 * (GermanSum100 / FrenchSum100) 		if GermanFirstHighest == 1 & FrenchSecondHighest == 1
replace dominance = GermanSum100 * (GermanSum100 / IrishSum100) 		if GermanFirstHighest == 1 & IrishSecondHighest == 1
replace dominance = GermanSum100 * (GermanSum100 / ItalianSum100) 		if GermanFirstHighest == 1 & ItalianSecondHighest == 1
replace dominance = GermanSum100 * (GermanSum100 / PolishSum100) 		if GermanFirstHighest == 1 & PolishSecondHighest == 1
replace dominance = GermanSum100 * (GermanSum100 / ScanSum100) 			if GermanFirstHighest == 1 & ScanSecondHighest == 1

replace dominance = AmericanSum100 * (AmericanSum100 / GermanSum100)	if AmericanFirstHighest == 1 & GermanSecondHighest == 1
replace dominance = AmericanSum100 * (AmericanSum100 / EnglishSum100) 	if AmericanFirstHighest == 1 & EnglishSecondHighest == 1
replace dominance = AmericanSum100 * (AmericanSum100 / FrenchSum100) 	if AmericanFirstHighest == 1 & FrenchSecondHighest == 1
replace dominance = AmericanSum100 * (AmericanSum100 / IrishSum100) 	if AmericanFirstHighest == 1 & IrishSecondHighest == 1
replace dominance = AmericanSum100 * (AmericanSum100 / ItalianSum100) 	if AmericanFirstHighest == 1 & ItalianSecondHighest == 1
replace dominance = AmericanSum100 * (AmericanSum100 / PolishSum100) 	if AmericanFirstHighest == 1 & PolishSecondHighest == 1
replace dominance = AmericanSum100 * (AmericanSum100 / ScanSum100) 		if AmericanFirstHighest == 1 & ScanSecondHighest == 1

replace dominance = EnglishSum100 * (EnglishSum100 / GermanSum100)		if EnglishFirstHighest == 1 & GermanSecondHighest == 1
replace dominance = EnglishSum100 * (EnglishSum100 / AmericanSum100)	if EnglishFirstHighest == 1 & AmericanSecondHighest == 1
replace dominance = EnglishSum100 * (EnglishSum100 / FrenchSum100)		if EnglishFirstHighest == 1 & FrenchSecondHighest == 1
replace dominance = EnglishSum100 * (EnglishSum100 / IrishSum100)		if EnglishFirstHighest == 1 & IrishSecondHighest == 1
replace dominance = EnglishSum100 * (EnglishSum100 / ItalianSum100)		if EnglishFirstHighest == 1 & ItalianSecondHighest == 1
replace dominance = EnglishSum100 * (EnglishSum100 / PolishSum100)		if EnglishFirstHighest == 1 & PolishSecondHighest == 1
replace dominance = EnglishSum100 * (EnglishSum100 / ScanSum100)		if EnglishFirstHighest == 1 & ScanSecondHighest == 1

replace dominance = FrenchSum100 * (FrenchSum100 / GermanSum100)		if FrenchFirstHighest == 1 & GermanSecondHighest == 1
replace dominance = FrenchSum100 * (FrenchSum100 / AmericanSum100)		if FrenchFirstHighest == 1 & AmericanSecondHighest == 1
replace dominance = FrenchSum100 * (FrenchSum100 / EnglishSum100)		if FrenchFirstHighest == 1 & EnglishSecondHighest == 1
replace dominance = FrenchSum100 * (FrenchSum100 / IrishSum100)			if FrenchFirstHighest == 1 & IrishSecondHighest == 1
replace dominance = FrenchSum100 * (FrenchSum100 / ItalianSum100)		if FrenchFirstHighest == 1 & ItalianSecondHighest == 1
replace dominance = FrenchSum100 * (FrenchSum100 / PolishSum100)		if FrenchFirstHighest == 1 & PolishSecondHighest == 1
replace dominance = FrenchSum100 * (FrenchSum100 / ScanSum100)			if FrenchFirstHighest == 1 & ScanSecondHighest == 1

replace dominance = IrishSum100 * (IrishSum100 / GermanSum100)			if IrishFirstHighest == 1 & GermanSecondHighest == 1
replace dominance = IrishSum100 * (IrishSum100 / AmericanSum100)		if IrishFirstHighest == 1 & AmericanSecondHighest == 1
replace dominance = IrishSum100 * (IrishSum100 / EnglishSum100)			if IrishFirstHighest == 1 & EnglishSecondHighest == 1
replace dominance = IrishSum100 * (IrishSum100 / FrenchSum100)			if IrishFirstHighest == 1 & FrenchSecondHighest == 1
replace dominance = IrishSum100 * (IrishSum100 / ItalianSum100)			if IrishFirstHighest == 1 & ItalianSecondHighest == 1
replace dominance = IrishSum100 * (IrishSum100 / PolishSum100)			if IrishFirstHighest == 1 & PolishSecondHighest == 1
replace dominance = IrishSum100 * (IrishSum100 / ScanSum100)			if IrishFirstHighest == 1 & ScanSecondHighest == 1

replace dominance = ItalianSum100 * (ItalianSum100 / GermanSum100)		if ItalianFirstHighest == 1 & GermanSecondHighest == 1
replace dominance = ItalianSum100 * (ItalianSum100 / AmericanSum100)	if ItalianFirstHighest == 1 & AmericanSecondHighest == 1
replace dominance = ItalianSum100 * (ItalianSum100 / EnglishSum100)		if ItalianFirstHighest == 1 & EnglishSecondHighest == 1
replace dominance = ItalianSum100 * (ItalianSum100 / FrenchSum100)		if ItalianFirstHighest == 1 & FrenchSecondHighest == 1
replace dominance = ItalianSum100 * (ItalianSum100 / IrishSum100)		if ItalianFirstHighest == 1 & IrishSecondHighest == 1
replace dominance = ItalianSum100 * (ItalianSum100 / PolishSum100)		if ItalianFirstHighest == 1 & PolishSecondHighest == 1
replace dominance = ItalianSum100 * (ItalianSum100 / ScanSum100)		if ItalianFirstHighest == 1 & ScanSecondHighest == 1

replace dominance = PolishSum100 * (PolishSum100 / GermanSum100)		if PolishFirstHighest == 1 & GermanSecondHighest == 1
replace dominance = PolishSum100 * (PolishSum100 / AmericanSum100)		if PolishFirstHighest == 1 & AmericanSecondHighest == 1
replace dominance = PolishSum100 * (PolishSum100 / EnglishSum100)		if PolishFirstHighest == 1 & EnglishSecondHighest == 1
replace dominance = PolishSum100 * (PolishSum100 / FrenchSum100)		if PolishFirstHighest == 1 & FrenchSecondHighest == 1
replace dominance = PolishSum100 * (PolishSum100 / IrishSum100)			if PolishFirstHighest == 1 & IrishSecondHighest == 1
replace dominance = PolishSum100 * (PolishSum100 / ItalianSum100)		if PolishFirstHighest == 1 & ItalianSecondHighest == 1
replace dominance = PolishSum100 * (PolishSum100 / ScanSum100)			if PolishFirstHighest == 1 & ScanSecondHighest == 1

replace dominance = ScanSum100 * (ScanSum100 / GermanSum100)			if ScanFirstHighest == 1 & GermanSecondHighest == 1
replace dominance = ScanSum100 * (ScanSum100 / AmericanSum100)			if ScanFirstHighest == 1 & AmericanSecondHighest == 1
replace dominance = ScanSum100 * (ScanSum100 / EnglishSum100)			if ScanFirstHighest == 1 & EnglishSecondHighest == 1
replace dominance = ScanSum100 * (ScanSum100 / FrenchSum100)			if ScanFirstHighest == 1 & FrenchSecondHighest == 1
replace dominance = ScanSum100 * (ScanSum100 / IrishSum100)				if ScanFirstHighest == 1 & IrishSecondHighest == 1
replace dominance = ScanSum100 * (ScanSum100 / ItalianSum100)			if ScanFirstHighest == 1 & ItalianSecondHighest == 1
replace dominance = ScanSum100 * (ScanSum100 / PolishSum100)			if ScanFirstHighest == 1 & PolishSecondHighest == 1


* Build three quantiles
pctile dom2 = dominance, nquantiles(3)
gen var2 = .
replace var2 = 1 if dominance <= .5397351
replace var2 = 2 if dominance > .5397351 & dominance <= 1.134175
replace var2 = 3 if dominance > 1.134175
tab var2


* Ancestry dummies whether the respespective ancestry is dominant

	* This only focuses on counties where German is the highest ancestry and in the highest quantile
gen GerDominant = .
replace GerDominant = 0 if GermanFirstHighest == 0
replace GerDominant = 1 if GermanFirstHighest == 1 & var2 == 3

gen AmeDominant = .
replace AmeDominant = 0 if AmericanFirstHighest == 0
replace AmeDominant = 0 if AmericanFirstHighest == 1 & var2 == 1
replace AmeDominant = 0 if AmericanFirstHighest == 1 & var2 == 2
replace AmeDominant = 1 if AmericanFirstHighest == 1 & var2 == 3 

gen EngDominant = .
replace EngDominant = 0 if EnglishFirstHighest == 0
replace EngDominant = 0 if EnglishFirstHighest == 1 & var2 == 1
replace EngDominant = 0 if EnglishFirstHighest == 1 & var2 == 2
replace EngDominant = 1 if EnglishFirstHighest == 1 & var2 == 3 

gen FreDominant = .
replace FreDominant = 0 if FrenchFirstHighest == 0
replace FreDominant = 0 if FrenchFirstHighest == 1 & var2 == 1
replace FreDominant = 0 if FrenchFirstHighest == 1 & var2 == 2
replace FreDominant = 1 if FrenchFirstHighest == 1 & var2 == 3 

gen IriDominant = .
replace IriDominant = 0 if IrishFirstHighest == 0
replace IriDominant = 0 if IrishFirstHighest == 1 & var2 == 1
replace IriDominant = 0 if IrishFirstHighest == 1 & var2 == 2
replace IriDominant = 1 if IrishFirstHighest == 1 & var2 == 3 

gen ItaDominant = .
replace ItaDominant = 0 if ItalianFirstHighest == 0
replace ItaDominant = 0 if ItalianFirstHighest == 1 & var2 == 1
replace ItaDominant = 0 if ItalianFirstHighest == 1 & var2 == 2
replace ItaDominant = 1 if ItalianFirstHighest == 1 & var2 == 3 

gen PolDominant = .
replace PolDominant = 0 if PolishFirstHighest == 0
replace PolDominant = 0 if PolishFirstHighest == 1 & var2 == 1
replace PolDominant = 0 if PolishFirstHighest == 1 & var2 == 2
replace PolDominant = 1 if PolishFirstHighest == 1 & var2 == 3 

gen ScaDominant = .
replace ScaDominant = 0 if ScanFirstHighest == 0
replace ScaDominant = 0 if ScanFirstHighest == 1 & var2 == 1
replace ScaDominant = 0 if ScanFirstHighest == 1 & var2 == 2
replace ScaDominant = 1 if ScanFirstHighest == 1 & var2 == 3 





*********** CONTINUOUS GERMAN dominance measure ********

* if German ancestry is not the first highest then from German ancestry to the highest one:
	
gen GerDominance = .
replace GerDominance = GermanSum100 * (GermanSum100 / AmericanSum100) 		if GermanFirstHighest == 1 & AmericanSecondHighest == 1
replace GerDominance = GermanSum100 * (GermanSum100 / EnglishSum100) 		if GermanFirstHighest == 1 & EnglishSecondHighest == 1
replace GerDominance = GermanSum100 * (GermanSum100 / FrenchSum100) 		if GermanFirstHighest == 1 & FrenchSecondHighest == 1
replace GerDominance = GermanSum100 * (GermanSum100 / IrishSum100) 		if GermanFirstHighest == 1 & IrishSecondHighest == 1
replace GerDominance = GermanSum100 * (GermanSum100 / ItalianSum100) 		if GermanFirstHighest == 1 & ItalianSecondHighest == 1
replace GerDominance = GermanSum100 * (GermanSum100 / PolishSum100) 		if GermanFirstHighest == 1 & PolishSecondHighest == 1
replace GerDominance = GermanSum100 * (GermanSum100 / ScanSum100) 			if GermanFirstHighest == 1 & ScanSecondHighest == 1

* add the part where German ancestry is not highest:
replace GerDominance = (-1) * (AmericanSum100 * (AmericanSum100 / GermanSum100))	if GermanFirstHighest == 0 & AmericanFirstHighest == 1
replace GerDominance = (-1) * (EnglishSum100 * (EnglishSum100 / GermanSum100)) 	if GermanFirstHighest == 0 & EnglishFirstHighest == 1
replace GerDominance = (-1) * (FrenchSum100 * (FrenchSum100 / GermanSum100)) 		if GermanFirstHighest == 0 & FrenchFirstHighest == 1
replace GerDominance = (-1) * (IrishSum100 * (IrishSum100 / GermanSum100)) 		if GermanFirstHighest == 0 & IrishFirstHighest == 1
replace GerDominance = (-1) * (ItalianSum100 * (ItalianSum100 / GermanSum100)) 	if GermanFirstHighest == 0 & ItalianFirstHighest == 1
replace GerDominance = (-1) * (PolishSum100 * (PolishSum100 / GermanSum100)) 		if GermanFirstHighest == 0 & PolishFirstHighest == 1
replace GerDominance = (-1) * (ScanSum100 * (ScanSum100 / GermanSum100)) 			if GermanFirstHighest == 0 & ScanFirstHighest == 1

sum GerDominance
sum GerDominance if GerDominance < 0





*********** CONTINUOUS AMERICAN dominance measure ********

gen AmeDominance = .
replace AmeDominance = AmericanSum100 * (AmericanSum100 / GermanSum100) 		if AmericanFirstHighest == 1 & GermanSecondHighest == 1
replace AmeDominance = AmericanSum100 * (AmericanSum100 / EnglishSum100) 		if AmericanFirstHighest == 1 & EnglishSecondHighest == 1
replace AmeDominance = AmericanSum100 * (AmericanSum100 / FrenchSum100) 		if AmericanFirstHighest == 1 & FrenchSecondHighest == 1
replace AmeDominance = AmericanSum100 * (AmericanSum100 / IrishSum100) 		if AmericanFirstHighest == 1 & IrishSecondHighest == 1
replace AmeDominance = AmericanSum100 * (AmericanSum100 / ItalianSum100) 		if AmericanFirstHighest == 1 & ItalianSecondHighest == 1
replace AmeDominance = AmericanSum100 * (AmericanSum100 / PolishSum100) 		if AmericanFirstHighest == 1 & PolishSecondHighest == 1
replace AmeDominance = AmericanSum100 * (AmericanSum100 / ScanSum100) 			if AmericanFirstHighest == 1 & ScanSecondHighest == 1

* add the part where American ancestry is not highest:
replace AmeDominance = (-1) * (GermanSum100 * (GermanSum100 / AmericanSum100))	if AmericanFirstHighest == 0 & GermanFirstHighest == 1
replace AmeDominance = (-1) * (EnglishSum100 * (EnglishSum100 / AmericanSum100)) 	if AmericanFirstHighest == 0 & EnglishFirstHighest == 1
replace AmeDominance = (-1) * (FrenchSum100 * (FrenchSum100 / AmericanSum100)) 		if AmericanFirstHighest == 0 & FrenchFirstHighest == 1
replace AmeDominance = (-1) * (IrishSum100 * (IrishSum100 / AmericanSum100)) 		if AmericanFirstHighest == 0 & IrishFirstHighest == 1
replace AmeDominance = (-1) * (ItalianSum100 * (ItalianSum100 / AmericanSum100)) 	if AmericanFirstHighest == 0 & ItalianFirstHighest == 1
replace AmeDominance = (-1) * (PolishSum100 * (PolishSum100 / AmericanSum100)) 		if AmericanFirstHighest == 0 & PolishFirstHighest == 1
replace AmeDominance = (-1) * (ScanSum100 * (ScanSum100 / AmericanSum100)) 			if AmericanFirstHighest == 0 & ScanFirstHighest == 1

sum AmeDominance
sum AmeDominance if AmeDominance < 0
	


************** for ANCESTRIES in TOTAL *************
		
* sum the ancestry shares
gen AncestryShareSum2 = .
replace AncestryShareSum2 = German_ancestry_share + American_ancestry_share + African_ancestry_share + English_ancestry_share + French_ancestry_share + Irish_ancestry_share + Italian_ancestry_share + Polish_ancestry_share + Scandinavian_ancestry_share + Hispanic_ancestry_share

* bring them all together to 100% & directly normalize them to 0-1 and not 0-100%
gen GermanSum100_2 = .
replace GermanSum100_2 = ((100 / AncestryShareSum2) * German_ancestry_share) / 100

gen AmericanSum100_2 = .
replace AmericanSum100_2 = ((100 / AncestryShareSum2) * American_ancestry_share) / 100 
	
gen AfricanSum100_2 = .
replace AfricanSum100_2 = ((100 / AncestryShareSum2) * African_ancestry_share) / 100

gen EnglishSum100_2 = .
replace EnglishSum100_2 = ((100 / AncestryShareSum2) * English_ancestry_share) / 100
	
gen FrenchSum100_2 = .
replace FrenchSum100_2 = ((100 / AncestryShareSum2) * French_ancestry_share) / 100 
	
gen IrishSum100_2 = .
replace IrishSum100_2 = ((100 / AncestryShareSum2) * Irish_ancestry_share) / 100 
	
gen ItalianSum100_2 = .
replace ItalianSum100_2 = ((100 / AncestryShareSum2) * Italian_ancestry_share) / 100
	
gen PolishSum100_2 = .
replace PolishSum100_2 = ((100 / AncestryShareSum2) * Polish_ancestry_share) / 100 
	
gen ScanSum100_2 = .
replace ScanSum100_2 = ((100 / AncestryShareSum2) * Scandinavian_ancestry_share) / 100 

gen HispSum100_2 = .
replace HispSum100_2 = ((100 / AncestryShareSum2) * Hispanic_ancestry_share) / 100 

* check whether it worked
gen test2 = . // worked!
replace test2 = GermanSum100_2 + AmericanSum100_2 + EnglishSum100_2 + FrenchSum100_2 + IrishSum100_2 + ItalianSum100_2 + PolishSum100_2 + ScanSum100_2 + AfricanSum100_2 + HispSum100_2
drop test2


**************************************************
* Reshape the dataset from long into wide format *
**************************************************

* drop unimportant variables
drop germansingle - jewish

* reshape the format 
reshape wide state - HispSum100_2, j(yearvote) i(fips) 
       
sort fips

* rename the variables that exist twice 
rename state1984 state
rename stateabb1984 stateabb
rename county1984 county

* drop their duplicates
drop state1988 stateabb1988 state1992 stateabb1992 state1996 stateabb1996 state2000 stateabb2000 state2004 stateabb2004 state2008 stateabb2008 state2012 stateabb2012 state2016 stateabb2016 county1988 county1992 county1996 county2000 county2004 county2008 county2012 county2016 

*** generate swing county dummies ***

gen swing_county_dummy2016 = . // dem to rep
replace swing_county_dummy2016 = 1 if dempercent2012 > reppercent2012 & reppercent2016 > dempercent2016 
replace swing_county_dummy2016 = 0 if dempercent2012 > reppercent2012 & dempercent2016 > reppercent2016
replace swing_county_dummy2016 = 0 if reppercent2012 > dempercent2012 & reppercent2016 > dempercent2016 


*** generate swing state dummies ***

sort stateabb // needed for the next command

* sum the number of votes per state in 2016
by stateabb: egen sumVotesDem16 = total(demvotes2016) // for democrats
by stateabb: egen sumVotesGop16 = total(repvotes2016) // for republicans
gen StateWinnerDem16 = . // generate a dummy indicating whether democrats have won the state
replace StateWinnerDem16 = 1 if sumVotesDem16 > sumVotesGop16
replace StateWinnerDem16 = 0 if sumVotesGop16 > sumVotesDem16
gen StateWinnerRep16 = . // generate a dummy indicating whether republicans have won the state
replace StateWinnerRep16 = 0 if sumVotesDem16 > sumVotesGop16
replace StateWinnerRep16 = 1 if sumVotesGop16 > sumVotesDem16

* sum the number of votes per state in 2012
by stateabb: egen sumVotesDem12 = total(demvotes2012)	
by stateabb: egen sumVotesGop12 = total(repvotes2012)	
gen StateWinnerDem12 = .
replace StateWinnerDem12 = 1 if sumVotesDem12 > sumVotesGop12
replace StateWinnerDem12 = 0 if sumVotesGop12 > sumVotesDem12
gen StateWinnerRep12 = .
replace StateWinnerRep12 = 0 if sumVotesDem12 > sumVotesGop12
replace StateWinnerRep12 = 1 if sumVotesGop12 > sumVotesDem12

* generate variable of interest 
gen swing_state_dummy2016 = . 
replace swing_state_dummy2016 = 1 if StateWinnerDem12 == 1 &  StateWinnerDem16 == 0
replace swing_state_dummy2016 = 0 if StateWinnerDem12 == 1 &  StateWinnerDem16 == 1 
replace swing_state_dummy2016 = 0 if StateWinnerRep12 == 1 &  StateWinnerRep16 == 1 


** save **
save "1984_2016_wide.dta", replace
clear 


******************************************************************************
******************************************************************************
******************************************************************************


* import ICPSR data
use "ICPSR.dta", clear

gen statea = .
replace statea = 90 if V1 == 1 // CT
replace statea = 230 if V1 == 2 // ME
replace statea = 250 if V1 == 3 // MA
replace statea = 330 if V1 == 4 // NH
replace statea = 440 if V1 == 5 // RI
replace statea = 500 if V1 == 6 // VT
replace statea = 100 if V1 == 11 // DE
replace statea = 340 if V1 == 12 // NJ
replace statea = 360 if V1 == 13 // NY
replace statea = 420 if V1 == 14 // PA
replace statea = 170 if V1 == 21 // IL
replace statea = 180 if V1 == 22 // IN
replace statea = 260 if V1 == 23 // MI
replace statea = 390 if V1 == 24 // OH
replace statea = 550 if V1 == 25 // WI
replace statea = 190 if V1 == 31 // IA
replace statea = 200 if V1 == 32 // KS
replace statea = 270 if V1 == 33 // MN
replace statea = 290 if V1 == 34 // MO
replace statea = 310 if V1 == 35 // NE
replace statea = 380 if V1 == 36 // ND
replace statea = 460 if V1 == 37 // SD
replace statea = 510 if V1 == 40 // VA
replace statea = 10 if V1 == 41 // AL
replace statea = 50 if V1 == 42 // AR
replace statea = 120 if V1 == 43 // FL
replace statea = 130 if V1 == 44 // GA
replace statea = 220 if V1 == 45 // LA
replace statea = 280 if V1 == 46 // MS
replace statea = 370 if V1 == 47 // NC
replace statea = 450 if V1 == 48 // SC
replace statea = 480 if V1 == 49 // TX
replace statea = 210 if V1 == 51 // KY
replace statea = 240 if V1 == 52 // MD
replace statea = 400 if V1 == 53 // OK
replace statea = 470 if V1 == 54 // TN
replace statea = 540 if V1 == 56 // WV
replace statea = 40 if V1 == 61 // AZ
replace statea = 80 if V1 == 62 // CO
replace statea = 160 if V1 == 63 // ID
replace statea = 300 if V1 == 64 // MT
replace statea = 320 if V1 == 65 // NV
replace statea = 350 if V1 == 66 // NM
replace statea = 490 if V1 == 67 // UT
replace statea = 560 if V1 == 68 // WY
replace statea = 60 if V1 == 71 // CA
replace statea = 410 if V1 == 72 // OR
replace statea = 530 if V1 == 73 // WA
replace statea = 20 if V1 == 81 // AK
replace statea = 150 if V1 == 82 // HI
tab statea, mis // worked


**** GENRATE CONSISTENT FIPS ****

* variables of interest: statea (state code) & V3 (county code) 

replace V3 = . if V3 == 9999 // set those to missing because they are not of interest


* for STATE
tostring statea, gen(stateaa)  // recode into a string variable
tab stateaa

gen stateas = length(stateaa) // calculate the length of each string (must be 2 or 3)
tab stateas, mis

* for COUNTY
tostring V3, gen(V3a)  // recode into a string variable
tab V3a

gen V3s = length(V3a) // calculate the length of each string (must be 1, 2, 3 or 4)
tab V3s, mis

replace V3s = . if V3a == "." // replace those missings to missing again
tab V3s, mis 


* set those counties whose county code (V3a) ends with 5,1,8 to missing

	* #5
replace V3a = "." if inlist(substr(V3a,1,.),"5") & V3s==1
replace V3a = "." if inlist(substr(V3a,2,.),"5") & V3s==2
replace V3a = "." if inlist(substr(V3a,3,.),"5") & V3s==3
replace V3a = "." if inlist(substr(V3a,4,.),"5") & V3s==4
	* #1
replace V3a = "." if inlist(substr(V3a,1,.),"1") & V3s==1
replace V3a = "." if inlist(substr(V3a,2,.),"1") & V3s==2
replace V3a = "." if inlist(substr(V3a,3,.),"1") & V3s==3
replace V3a = "." if inlist(substr(V3a,4,.),"1") & V3s==4
	* #8
replace V3a = "." if inlist(substr(V3a,1,.),"8") & V3s==1
replace V3a = "." if inlist(substr(V3a,2,.),"8") & V3s==2
replace V3a = "." if inlist(substr(V3a,3,.),"8") & V3s==3
replace V3a = "." if inlist(substr(V3a,4,.),"8") & V3s==4
	* #7
replace V3a = "." if inlist(substr(V3a,1,.),"7") & V3s==1
replace V3a = "." if inlist(substr(V3a,2,.),"7") & V3s==2
replace V3a = "." if inlist(substr(V3a,3,.),"7") & V3s==3
replace V3a = "." if inlist(substr(V3a,4,.),"7") & V3s==4
	* #9
replace V3a = "." if inlist(substr(V3a,1,.),"9") & V3s==1
replace V3a = "." if inlist(substr(V3a,2,.),"9") & V3s==2
replace V3a = "." if inlist(substr(V3a,3,.),"9") & V3s==3
replace V3a = "." if inlist(substr(V3a,4,.),"9") & V3s==4

tab V3a, mis // worked but we also see values ending with 1 and 8

replace stateaa = stateaa + "0" if stateas==2 & V3s==1 // add one 0 if the state code consists of two digits
replace stateaa = stateaa + "0" if stateas==2 & V3s==2 // add one 0 if the state code consists of two digits
replace stateaa = stateaa + "0" if stateas==3 & V3s==2 // add one 0 if the state code consists of two digits
tab stateaa // worked

* remove the 0s at the end of the state code variable if county code variable has 4 digits
replace stateaa = substr(stateaa, 1, 2)  if V3s==4
tab stateaa // worked

* remove the 0s at the end of the county code variable 
replace V3a = substr(V3a, 1, 2)  if V3s==1 // done as for V3s = 3 because I added two zeros in the previous syntax
replace V3a = substr(V3a, 1, 1)  if V3s==2
replace V3a = substr(V3a, 1, 2)  if V3s==3
replace V3a = substr(V3a, 1, 3)  if V3s==4
tab V3a // worked

* Control length 
gen V3astr = length(V3a)
tab V3astr              // 1,2,3 

gen stateaastr = length(stateaa)
tab stateaastr              // 2,3, 4


gen fips = stateaa + V3a 
replace fips = "." if V3a == "."
tab fips, mis

* generate numeric variable
destring fips, gen(fip)
replace fip = . if fips == "."
tab fip, mis

drop fips
rename fip fips
order fips V1 V3 stateaa
sort fips

* check that no duplicates are included anymore
duplicates report fips
drop if fips == . 
duplicates report fips // no duplicates anymore

* save the dataset
save "ICPSR_G.dta", replace

******************************************************************************
******************************************************************************
******************************************************************************





**** MERGE the dataset with the CLASS and MIG data ****

use "ICPSR_G.dta", clear

merge m:m fips using "mig1910.dta"
drop _merge

rename year year_n
merge m:m fips using "class2015.dta"
drop _merge

* drop variables we do not need
drop V3 stateaa V3a V757 - gisjoin state - areame regio - me_e adnee001 - adnee008 adpie001 - adpim007
drop V4 - V316 // drop irrelevant election data
drop V317 - V377 // election data 1900 - 1910
drop V382 - V401 V406 - V427 V431 - V451 V455 - V476 // drop congress data & results of other parties




*** VARIABLE PREPARATION ***

	* CLASS 2015 DATA

	*EDUCATION:
* generate the college variable
gen NoCollegeDegree = .
replace NoCollegeDegree = (100 / admze001) * (admze002 + admze003 + admze004 + admze005 + admze006 + admze007 + admze008 + admze009 + admze010 + admze011 + admze012 + admze013 + admze014 + admze015 + admze016 + admze017 + admze018 + admze019 + admze020)
tab NoCollegeDegree, mis

	*INCOME:
* generate INCOME as the difference between a county's annual household income and the nation median income (55,775); (could use income related to poverty level)
	
*tab adnke001, mis
gen IncomeDiffToNatMedianIncome = .
replace IncomeDiffToNatMedianIncome = 55775 - adnke001
sum IncomeDiffToNatMedianIncome


	* MIG 1910 DATA

* 1910: *

gen GermanBirthPlace1910 = .
replace GermanBirthPlace1910 = (100 / a0e001) * (a4q008 + a4q001) // Germany & Austria
tab GermanBirthPlace1910, mis 

gen IrishBirthPlace1910 = .
replace IrishBirthPlace1910 = (100 / a0e001) * (a4q012)
tab IrishBirthPlace1910, mis 

gen ItalianBirthPlace1910 = .
replace ItalianBirthPlace1910 = (100 / a0e001) * (a4q013)
tab ItalianBirthPlace1910, mis 


*** 1912 ***
	* replace missings:
*tab V378, mis
replace V378 = . if V378 > 100
rename V378 DEM1912

*tab V379, mis
replace V379 = . if V379 > 100
rename V379 REP1912



*** 1916 ***
	* replace missings:
*tab V402, mis
replace V402 = . if V402 > 100
rename V402 DEM1916

*tab V403, mis
replace V403 = . if V403 > 100
rename V403 REP1916


*** 1920 ***
	* replace missings:
*tab V428, mis
replace V428 = . if V428 > 100
rename V428 DEM1920

*tab V429, mis
replace V429 = . if V429 > 100
rename V429 REP1920


*** 1924 ***
	* replace missings:
*tab V452, mis
replace V452 = . if V452 > 100
rename V452 DEM1924

*tab V453, mis
replace V453 = . if V453 > 100
rename V453 REP1924

*tab V454, mis
replace V454 = . if V454 > 100
rename V454 PROG1924



* merge with our previous dataset
merge m:m fips using "1984_2016_wide.dta" 
drop _merge


* Generate variable of interest:

* Difference in Rep's voteshare (Trump '16 - Romney '12)
gen DiffRepRep20162012 = .
replace DiffRepRep20162012 = (reppercent2016 - reppercent2012)

gen DiffRepRep20122008 = .
replace DiffRepRep20122008 = (reppercent2012 - reppercent2008)

gen DifDifRepRep = .
replace DifDifRepRep = DiffRepRep20162012 - DiffRepRep20122008

*gen swing variable
gen SwingCountyDummy19201916 = . // DEM 1916 to REP 1920
replace SwingCountyDummy19201916 = 1 if DEM1916 > REP1916 & REP1920 > DEM1920 
replace SwingCountyDummy19201916 = 0 if DEM1916 > REP1916 & DEM1920 > REP1920 
replace SwingCountyDummy19201916 = 0 if REP1916 > DEM1916 & REP1920 > DEM1920 

* save the dataset
save "DataAll.dta", replace


******************************************************************************
******************************************************************************
******************************************************************************



	* CCES Data: micro data
*https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi%3A10.7910/DVN/GDF6Z0

** Prepare the data for merging German dominance to the CCES data **

* load our data
use "DataAll.dta", clear

* only keep those variables of interest
keep fips var22016 GerDominant2016  GerDominant2012 GerDominance2016 German_share_single2016 - Italian_share_single2016 swing_state_dummy2016

* save the dataset
save "GerDominanceIndic.dta", replace


******************************************************************************
******************************************************************************
******************************************************************************

* load the CCES data
use "CCES16_Common_OUTPUT_Feb2018_VV.dta", clear

* rename the variable for merging
rename countyfips fips_str

* reshape str fips into numeric variables
destring fips_str, gen(fips)
sort fips


merge m:m fips using "GerDominanceIndic.dta"


* drop those observations which are not matched as we cannot use them
drop if _merge != 3
drop _merge

order fips var22016 GerDominant2016 


* Party identification:
gen RepPID = .
replace RepPID = 1 if pid3 == 2
replace RepPID = 0 if pid3 == 1
replace RepPID = 0 if pid3 > 2 & pid3 < 4
tab RepPID, mis // mising are those who are "not sure"

gen NoPID = .
replace NoPID = 1 if pid3 == 3
replace NoPID = 0 if pid3 == 1 
replace NoPID = 0 if pid3 == 2
replace NoPID = 0 if pid3 == 4
tab NoPID, missing // mising are those who are "not sure"

* Attitudes:
generate AgainstFreeTrade = .
replace AgainstFreeTrade = 1 if CC16_351B == 2
replace AgainstFreeTrade = 0 if CC16_351B == 1
tab AgainstFreeTrade, mis

gen AgainstHelpUN = . // actually whether someone is AGAINST helping the UN
replace AgainstHelpUN = 1 if CC16_414_6 == 2
replace AgainstHelpUN = 0 if CC16_414_6 == 1
tab AgainstHelpUN, mis


*** Generate vote choice dummies ***
tab CC16_410a CC16_326

* voted Trum vs Clinton
gen VoteTrumpVsClinton = . 
replace VoteTrumpVsClinton = 1 if CC16_410a == 1
replace VoteTrumpVsClinton = 0 if CC16_410a == 2
tab VoteTrumpVsClinton, mis
   
	   
* gen age variable
tab birthyr, mis
gen age = 2016 - birthyr
tab age, mis


* recode gender
replace gender = 0 if gender == 1
replace gender = 1 if gender == 2
tab gender, mis

* education: educ
* income: faminc (also: CC16_303 and CC16_304)
replace faminc = . if faminc == 31 | faminc == 97


* generate the interactions needed later
gen interaction1 = NoPID*AgainstHelpUN
gen interaction2 = NoPID*AgainstFreeTrade


* save the dataset
save "IndividualData.dta", replace










